####################################################
### code chunk number : A Real Data Example
####################################################
## 
## 
## 
rm(list=ls(all=TRUE))
source("subfunctions.R")

## the number of days of absence
## https://stats.idre.ucla.edu/r/dae/negative-binomial-regression/
## OK!
library(foreign)
# dat = read.dta("https://stats.idre.ucla.edu/stat/stata/dae/nb_data.dta"); dat
dat = read.dta("data/nb_data.dta"); dat
dat = within(dat, {
    prog = factor(prog, levels = 1:3, labels = c("General", "Academic", "Vocational"))
    id = factor(id)
})
summary(dat)

sum(dat$prog=="General")
sum(dat$prog=="Academic")
sum(dat$prog=="Vocational")

x = dat$daysabs
range(x)
data_outline(x)
L_x = length(x); L_x
n = L_x - 1; n

x_1_n = x[1:n]
data_outline(x_1_n)
max_x_1_n = max(x[1:n]); max_x_1_n
nn = numeric(max_x_1_n + 1)
names(nn) = 0:max_x_1_n
nn
m = length(nn); m
for (i in 1:m){
	nn[i] = sum(x_1_n == (i - 1))
}
nn
sum_nn = sum(nn); sum_nn

## Generate_Matrix_Latex for B
A = rbind(0:35, nn); A
B = rbind(A[, 1:12], A[, 13:24], A[, 25:36]); B
Generate_Matrix_Latex(format(B, nsmall = 0))

x_1_n_tab = table(factor(x_1_n)); x_1_n_tab

# Histogram
dev.new()
hist(x_1_n, probability = FALSE, breaks = -1:max(x_1_n),
    main = paste("Histogram of" , "x"), 
    xlab = "x")
savePlot(filename = "figure/P-G_Histogram_x_1_n", type = c("pdf"))


## Compute theoretical probabilities
## Moment estimators
## p_moment and p_moment_1 are almost equal.
alpha_1_beta_1 = Moment_estimators(x_1_n); alpha_1_beta_1
round(alpha_1_beta_1, 3)
alpha_1 = alpha_1_beta_1[1]; alpha_1
beta_1 = alpha_1_beta_1[2]; beta_1
p_moment = p_moment_1 = rep(0, m)
for (i in 1:(m - 1)){
	p_moment_1[i] = gamma(i - 1 + alpha_1) * beta_1^alpha_1 / (gamma(i) * gamma(alpha_1) * (1 + beta_1)^(i - 1 + alpha_1))
	p_moment[i] = gamma(i - 1 + alpha_1) / gamma(i + alpha_1) / beta(i, alpha_1) / (1 + beta_1)^(i - 1) * (beta_1 / (1 + beta_1))^alpha_1
}
p_moment[m] = 1 - sum(p_moment)
p_moment_1[m] = 1 - sum(p_moment_1)
p_moment
p_moment_1
p_moment - p_moment_1
sum(p_moment)
sum(p_moment_1)
round(n * p_moment, 1)
nn
(nn - n * p_moment)^2 / (n * p_moment)

## Test
df_2_moment = Pearson_chisquare_test(nn = nn, p = p_moment, r = 2); df_2_moment

## Compute_Estimators_PESLs_new
source("subfunctions.R")
res_moment = Compute_Estimators_PESLs_new(alpha = alpha_1, beta = beta_1, x_np1 = x[n + 1]); res_moment
round(res_moment, 4)

## Compute the theoretical mean and variance of the data
E_V_moment = Compute_mean_variance(alpha = alpha_1, beta = beta_1); E_V_moment
data_outline(x_1_n)


## 
## When alpha and beta are unknown, and they are estimated by their MLEs.
## 

## Compute theoretical probabilities
## MLE
## The MLE is very sensitive to the initial guess. 
## The moment estimator is usually a good initial guess.
res_Newtons = Newtons(fun = moment_fun, p = alpha_1_beta_1, x = x_1_n)
alpha_2_beta_2 = res_Newtons$root; alpha_2_beta_2
round(alpha_2_beta_2, 3)
alpha_2 = alpha_2_beta_2[1]; alpha_2
beta_2 = alpha_2_beta_2[2]; beta_2
p_MLE = rep(0, m)
for (i in 1:(m - 1)){
	# p_MLE[i] = gamma(i - 1 + alpha_2) * beta_2^alpha_2 / (gamma(i) * gamma(alpha_2) * (1 + beta_2)^(i - 1 + alpha_2))
	p_MLE[i] = gamma(i - 1 + alpha_2) / gamma(i + alpha_2) / beta(i, alpha_2) / (1 + beta_2)^(i - 1) * (beta_2 / (1 + beta_2))^alpha_2
}
p_MLE[m] = 1 - sum(p_MLE)
p_MLE
sum(p_MLE)

## Test
df_2_MLE = Pearson_chisquare_test(nn = nn, p = p_MLE, r = 2); df_2_MLE

## Compute_Estimators_PESLs_new
res_MLE = Compute_Estimators_PESLs_new(alpha = alpha_2, beta = beta_2, x_np1 = x[n + 1]); res_MLE
round(res_MLE, 4)

## Compute the theoretical mean and variance of the data
E_V_MLE = Compute_mean_variance(alpha = alpha_2, beta = beta_2); E_V_MLE
data_outline(x_1_n)


## 
## H0: X ~ Poisson(theta)
## 
## When theta is unknown, and it is estimated by its moment estimator or MLE.
## 

## Compute theoretical probabilities
## Moment estimator, MLE
theta = theta_1 = theta_2 = mean(x_1_n); theta
p = rep(0, m)
for (i in 1:(m - 1)){
	p[i] = theta^(i - 1) * exp(-theta) / gamma(i)
}
p[m] = 1 - sum(p)
p
sum(p)

## Test
df_1 = Pearson_chisquare_test(nn = nn, p = p, r = 1); df_1


